import os
import re
from pathlib import Path

def count_words(content):
    pattern = re.compile(r'[\u4e00-\u9fff\u3000-\u303f\uff00-\uffef]|[a-zA-Z\.\,\?\!\;\:\'\"]')
    matches = pattern.findall(content)
    return len(matches)

def main():
    current_dir = Path('.')
    report_dir = current_dir / 'report'
    report_dir.mkdir(exist_ok=True)
    
    total_words = 0
    max_per_directory = {}
    results = []
    
    # 收集并处理文件
    for md_file in current_dir.rglob('*-md优化-0000.md'):
        if md_file.is_file():
            try:
                with open(md_file, 'r', encoding='utf-8') as f:
                    content = f.read()
                word_count = count_words(content)
                total_words += word_count
                wan_count = word_count / 10000
                results.append((md_file.name, wan_count))
                
                # 处理精确统计逻辑
                parent_dir = str(md_file.parent)
                current_max = max_per_directory.get(parent_dir, 0)
                if word_count > current_max:
                    max_per_directory[parent_dir] = word_count
            except Exception as e:
                print(f"读取文件 {md_file} 时出错: {e}")
    
    # 计算精确统计总和
    precise_total = sum(max_per_directory.values()) / 10000
    
    # 生成报告内容
    report_md = "# 字数统计报告\n\n"
    report_md += "| 文件名 | 字数（万字） |\n"
    report_md += "|--------|-------------|\n"
    
    # 按字数降序排序结果
    results.sort(key=lambda x: x[1], reverse=True)
    
    for name, wan in results:
        report_md += f"| {name} | {wan:.2f} |\n"
    
    report_md += f"| **总计** | **{total_words / 10000:.2f}** |\n"
    report_md += f"| **精确统计** | **{precise_total:.2f}** |\n"
    
    # 写入报告文件
    import datetime
    timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
    report_path = report_dir / f'word_count_report_{timestamp}.md'
    with open(report_path, 'w', encoding='utf-8') as f:
        f.write(report_md)
    print(f"报告已生成: {report_path}")

if __name__ == '__main__':
    main()